*****************************************************************************
*** prod_shampoo.do
***
*****************************************************************************


*** Load product-specific information and read size

use Original_Data\Shampoo_prod_db, clear
drop stubspec1776rc00004 

rename sy system
rename ge gener
rename vend vendor

tab producttype l2
keep if producttype=="SHAMPOO" | producttype== "DANDRUFF SHAMPOO"

drop if l5=="NINTENDO GAME BOY"


*** Size ***
gen lastword = word(l9,-1)
gen OZ= regexm(lastword,"OZ")
tab OZ

gen size = real(subinstr(lastword,"OZ","",1))


gen brand=l5

*bysort l4: tab brand

replace brand = "ABBA" 	  						if regexm(brand,"^ABBA") & regexm(l4,"^ABBA")
replace brand = "ALAGIO" 	  					if regexm(brand,"^ALAGIO") & regexm(l4,"^ALAGIO")
replace brand = "ALBERTO" 	  					if regexm(brand,"^ALBERTO") & regexm(l4,"^ALBERTO")
replace brand = "MOTIONS" 	  					if regexm(brand,"^MOTIONS") & regexm(l4,"^ALBERTO")
replace brand = "ST IVES"	 	  				if regexm(brand,"^ST IVES") & regexm(l4,"^ALBERTO")
replace brand = "TCB"	 	  					if regexm(brand,"^TCB") & regexm(l4,"^ALBERTO")
replace brand = "TRESEMME"	 	  				if regexm(brand,"^TRESEMME") & regexm(l4,"^ALBERTO")
replace brand = "DOO GRO" 	  					if regexm(brand,"^DOO GRO") & regexm(l4,"^ALTMAN")
replace brand = "AMERICAN CREW" 				if regexm(brand,"^AMERICAN CREW") & regexm(l4,"^AMERICAN CREW")
replace brand = "MODERN ORGANIC PRODUCTS" 	  	if regexm(brand,"^") & regexm(l4,"^AMERICAN CREW")
replace brand = "ARTEC" 	  					if regexm(brand,"^ARTEC") & regexm(l4,"^ARTEC")
replace brand = "AUBREY" 						if regexm(brand,"^AUBREY") & regexm(l4,"^AUBREY")
replace brand = "AVEDA" 	 					if regexm(brand,"^AVEDA") & regexm(l4,"^AVEDA")
replace brand = "KID CARE" 	 					if regexm(brand,"^KID CARE") & regexm(l4,"^BELAE BRANDS")
replace brand = "PHYTO" 	 					if regexm(brand,"^PHYTO") & regexm(l4,"^BI-LO INC")
replace brand = "BROCATO" 	 					if regexm(brand,"^BROCATO") & regexm(l4,"^BROCATO")
replace brand = "BUMBLE & BUMBLE" 	 			if regexm(brand,"^BUMBLE") & regexm(l4,"^BUMBLE AND BUMBLE")
replace brand = "BURTS BEES" 	 				if regexm(brand,"^BURTS BEES") & regexm(l4,"^BURT'S BEES")
replace brand = "BURTS BEES" 	 				if regexm(brand,"^DOCTOR BURTS") & regexm(l4,"^BURT'S BEES")

replace brand = "MILL CREEK" 	 				if regexm(brand,"^MILL CREEK") & regexm(l4,"^CARME INC")

replace brand = "WASH N" 	 					if regexm(brand,"^WASH") & regexm(l4,"^CCA INDUSTRIES")
replace brand = "CHARLES WORTHINGTON" 	 		if regexm(brand,"^CHARLES") & regexm(l4,"^CHARLES WORTHINGTON")
replace brand = "SELSUN" 						if regexm(brand,"^SELSUN") & regexm(l4,"^CHATTEM")
replace brand = "AFRICAS BEST" 	 				if regexm(brand,"^AFRICAS BEST") & regexm(l4,"^CHEATHAM")
replace brand = "EMPRESS" 	 					if regexm(brand,"^EMPRESS") & regexm(l4,"^COLBERTS INC")
replace brand = "CAPRICE" 	 					if regexm(brand,"^CAPRICE") & regexm(l4,"^COLGATE")
replace brand = "COLLECTIVE WELLBEING" 	 		if regexm(brand,"^COLLECTIVE WELL") & regexm(l4,"^COLLECTIVE WELL")
replace brand = "CREME OF NATURE" 	 			if regexm(brand,"^CREME OF NATURE") & regexm(l4,"^COLOMER")
replace brand = "CONAIR" 		 				if regexm(brand,"^CONAIR") & regexm(l4,"^CONAIR")
replace brand = "JHERI REDDING"		 	 		if regexm(brand,"^JHERI") & regexm(l4,"^CONAIR")
replace brand = "COSRICH" 	 					if regexm(brand,"^COSRICH") & regexm(l4,"^COSRICH")
replace brand = "DAVINES" 	 					if regexm(brand,"^DAVINES") & regexm(l4,"^DAVINES")
replace brand = "ECCO BELLA" 	 				if regexm(brand,"^ECCO BELLA") & regexm(l4,"^E B BOTANICALS")
replace brand = "BIOSILK" 	 					if regexm(brand,"^BIOSILK") & regexm(l4,"^FAROUK")
replace brand = "FOCUS 21" 	 					if regexm(brand,"^FOCUS 21") & regexm(l4,"^FOCUS 21")
replace brand = "FRAMESI" 	 					if regexm(brand,"^FRAMESI") & regexm(l4,"^FRAMESI")
replace brand = "STERLING SOLUTIONS" 	 		if regexm(brand,"^STERLING SOL") & regexm(l4,"^FRANZUS")
replace brand = "GARNIER FRUCTIS" 	 			if regexm(brand,"^GARNIER") & regexm(l4,"^GARNIER")
replace brand = "GIOVANNI" 	 					if regexm(brand,"^GIOVANNI") & regexm(l4,"^GIOVANNI")
replace brand = "GOLDWELL" 	 					if regexm(brand,"^GOLDWELL") & regexm(l4,"^GOLDWELL")
replace brand = "GLOVER" 	 					if regexm(brand,"^GLOVER") & regexm(l4,"^H CLAY GLOVER")
replace brand = "HASK" 	 						if regexm(brand,"^HASK") & regexm(l4,"^HASK")
replace brand = "DARA" 	 						if regexm(brand,"^DARA") & regexm(l4,"^HEALTHPOINT")
replace brand = "IONIL" 	 					if regexm(brand,"^IONIL") & regexm(l4,"^HEALTHPOINT")
replace brand = "SALON SELECTIVES" 	 		 	if regexm(brand,"^SALON SELECTIV") & regexm(l4,"^HELENE CURTIS")
replace brand = "SUAVE" 	 					if regexm(brand,"^SUAVE") & regexm(l4,"^HELENE CURTIS")
replace brand = "THERMASILK" 	 				if regexm(brand,"^THERMASILK") & regexm(l4,"^HELENE CURTIS")
replace brand = "VIBRANCE" 	 					if regexm(brand,"^VIBRANCE") & regexm(l4,"^HELENE CURTIS")
replace brand = "HOME HEALTH" 	 				if regexm(brand,"^HOME HEALTH") & regexm(l4,"^HOME HEALTH")
replace brand = "DAX" 	 						if regexm(brand,"^DAX") & regexm(l4,"^IMPERIAL DAX")
replace brand = "NICOLE" 	 					if regexm(brand,"^NICOLE") & regexm(l4,"^INDUSTRIA COSMETICA")
replace brand = "KIDS CHOICE" 	 				if regexm(brand,"^KIDS CHOICE") & regexm(l4,"^INTERNATIONAL BEUATY NETWORK")
replace brand = "SEDAL" 	 					if regexm(brand,"^SEDAL") & regexm(l4,"^INTERNATIONAL GROCERS")
replace brand = "BEACH BLONDE" 	 				if regexm(brand,"^BEACH BLONDE") & regexm(l4,"^JOHN FRIEDA")
replace brand = "BRILLIANT BRUNETTE" 	 		if regexm(brand,"^BRILLIANT BRUNETTE") & regexm(l4,"^JOHN FRIEDA")
replace brand = "FRIZZ" 	 					if regexm(brand,"^FRIZZ") & regexm(l4,"^JOHN FRIEDA")
replace brand = "SHEER BLONDE" 	 				if regexm(brand,"^SHEER BLONDE") & regexm(l4,"^JOHN FRIEDA")
replace brand = "PAUL MITCHELL" 	 			if (regexm(brand,"^PAUL MITCHELL") | regexm(brand,"^PAUL MITCHLL")) & regexm(l4,"^JOHN PAUL MITCHELL")
replace brand = "JOHNSONS" 	 					if regexm(brand,"^JOHNSONS") & regexm(l4,"^JOHNSON & JOHNSON")
replace brand = "JOICO" 	 					if regexm(brand,"^JOICO") & regexm(l4,"^JOICO")
replace brand = "KERASTASE" 	 				if regexm(brand,"^KERASTASE") & regexm(l4,"^KERASTASE")
replace brand = "VITAL" 	 					if regexm(brand,"^VITAL") & regexm(l4,"^KEY BRANDS")
replace brand = "KISS MY FACE" 	 				if regexm(brand,"^KISS MY FACE") & regexm(l4,"^KISS MY FACE")
replace brand = "KMS" 	 						if regexm(brand,"^KMS") & regexm(l4,"^KMS R")
replace brand = "B IN 10" 	 					if regexm(brand,"^B IN 10") & regexm(l4,"^LAMAUR")
replace brand = "LAMAUR" 	 					if regexm(brand,"^LAMAUR") & regexm(l4,"^LAMAUR")
replace brand = "JASON" 	 					if regexm(brand,"^JASON") & regexm(l4,"^JASON")
replace brand = "LAMAS " 	 					if regexm(brand,"^LAMAS") & regexm(l4,"^LAMAS ")
replace brand = "STYLE" 	 					if regexm(brand,"STYLE") & regexm(l4,"^LAMAUR")
replace brand = "LANDER" 	 					if regexm(brand,"^LANDER") & regexm(l4,"^LANDER")
replace brand = "LANZA" 	 					if regexm(brand,"^LANZA") & regexm(l4,"^LANZA")
replace brand = "DOVE" 	 						if regexm(brand,"^DOVE") & regexm(l4,"^LEVER BROTHERS")
replace brand = "NATURES GATE" 	 				if regexm(brand,"^NATURES") & regexm(l4,"^LEVLAD")
replace brand = "LOREAL" 	 					if regexm(brand,"^LOREAL") & regexm(l4,"^LOREAL")
replace brand = "LORI DAVIS" 	 				if regexm(brand,"^LORI DAVIS") & regexm(l4,"^LORI DAVIS")
replace brand = "LUSTERS" 	 					if regexm(brand,"^LUSTERS") & regexm(l4,"^LUSTERS")
replace brand = "LUSTRASILK" 	 				if regexm(brand,"^LUSTRASILK") & regexm(l4,"^LUSTRASILK")
replace brand = "MARC ANTHONY" 	 				if regexm(brand,"^MARC ANTHONY") & regexm(l4,"^MARC ANTHONY")
replace brand = "APPEARANCE" 	 				if regexm(brand,"^APPEARANCE") & regexm(l4,"^MARIANNA")
replace brand = "LOGICS" 	 					if regexm(brand,"^LOGICS") & regexm(l4,"^MATRIX ESSENTIALS")
replace brand = "MATRIX" 	 					if regexm(brand,"^MATRIX") & regexm(l4,"^MATRIX")
replace brand = "VALET" 	 					if regexm(brand,"^VALET") & regexm(l4,"^MECHANICAL SERVANTS")
replace brand = "LOONEY TUNES" 	 				if regexm(brand,"^LOONEY TUNES") & regexm(l4,"^MINNETONKA")
replace brand = "DEEP EARTH" 	 				if regexm(brand,"^DEEP EARTH") & regexm(l4,"^MODERN RSRCH")
replace brand = "NATURELLE" 	 				if regexm(brand,"^NATURELLE") & regexm(l4,"^NATUR ELLE")
replace brand = "NATURADE" 	 					if regexm(brand,"^NATURADE") & regexm(l4,"^NATURADE")
replace brand = "PROFESSIONAL" 	 				if regexm(brand,"^PROFESSIONAL") & regexm(l4,"^NATURE'S THERAPY")
replace brand = "SPHERICAL" 	 				if regexm(brand,"^SPHERICAL") & regexm(l4,"^NATURE'S THERAPY")
replace brand = "HANDY SOLUTIONS" 	 			if regexm(brand,"^HANDY") & regexm(l4,"^NAVAJO")
replace brand = "NEUTROGENA" 	 				if regexm(brand,"^NEUTROGENA") & regexm(l4,"^NEUTROGENA")
replace brand = "LA BELLA" 	 					if regexm(brand,"^LA BELLA") & regexm(l4,"^NEWHALL")
replace brand = "NEXXUS" 	 					if regexm(brand,"^NEXXUS") & regexm(l4,"^NEXXUS")
replace brand = "NIOXIN" 	 					if regexm(brand,"^NIOXIN") & regexm(l4,"^NIOXIN")
replace brand = "ELEMENTS" 	 					if regexm(brand,"ELEMENTS") & regexm(l4,"^NYLOOKS")
replace brand = "PERCARA" 	 					if regexm(brand,"^PERCARA") & regexm(l4,"^PERCARA")
replace brand = "BINGE" 	 					if regexm(brand,"^BINGE") & regexm(l4,"^PH BEAUTY")
replace brand = "JHIRMACK" 	 					if regexm(brand,"^JHIRMACK") & regexm(l4,"^PLAYTEX")
replace brand = "PROLINE" 	 					if regexm(brand,"^PROLINE") & regexm(l4,"^PRO-LINE")
replace brand = "SOFT & BEAUTIFUL" 	 			if regexm(brand,"^SOFT") & regexm(l4,"^PRO-LINE")
replace brand = "AUSSIE" 	 					if regexm(brand,"^AUSSIE") & regexm(l4,"^PROCTER & GAMBLE")
replace brand = "BACK TO BASICS" 	 			if regexm(brand,"^BACK TO BASICS") & regexm(l4,"^PROCTER & GAMBLE")
replace brand = "CLAIROL" 	 					if regexm(brand,"^CLAIROL") & regexm(l4,"^PROCTER & GAMBLE")
replace brand = "INFUSIUM" 	 					if regexm(brand,"^INFUSIUM") & regexm(l4,"^PROCTER & GAMBLE")
replace brand = "PANTENE" 	 					if regexm(brand,"^PANTENE") & regexm(l4,"^PROCTER & GAMBLE")
replace brand = "PERT" 	 						if regexm(brand,"^PERT") & regexm(l4,"^PROCTER & GAMBLE")
replace brand = "PHYSIQUE" 	 					if regexm(brand,"^PHYSIQUE") & regexm(l4,"^PROCTER & GAMBLE")
replace brand = "WELLA" 	 					if regexm(brand,"^WELLA") & regexm(l4,"^PROCTER & GAMBLE")
replace brand = "SEBASTIAN" 	 				if regexm(brand,"^SEBASTIAN") & regexm(l4,"^PROCTER & GAMBLE")
replace brand = "VIDAL" 	 					if regexm(brand,"^VS SASSOON") & regexm(l4,"^PROCTER & GAMBLE")
replace brand = "LUCKY KENTUCKY" 	 			if regexm(brand,"^LUCKY KENTUCKY") & regexm(l4,"^PROGRESSIVE BEAUTY")
replace brand = "PUREOLOGY" 	 				if regexm(brand,"^PUREOLOGY") & regexm(l4,"^PUREOLOGY")
replace brand = "REDKEN" 	 					if regexm(brand,"^REDKEN") & regexm(l4,"^REDKEN")
replace brand = "REVLON" 	 					if regexm(brand,"^REVLON") & regexm(l4,"^REVLON")
replace brand = "RUSK" 	 						if regexm(brand,"^RUSK") & regexm(l4,"^RUSK")
replace brand = "FREEMAN" 	 					if regexm(brand,"^FREEMAN") & regexm(l4,"^SARAH MICHAELS")
replace brand = "AGREE" 	 					if regexm(brand,"^AGREE") & regexm(l4,"^SCHWARZKOPF")
replace brand = "GOT" 	 						if regexm(brand,"^GOT") & regexm(l4,"^SCHWARZKOPF")
replace brand = "SMOOTH N SHINE" 	 			if regexm(brand,"^SMOOTH") & regexm(l4,"^SCHWARZKOPF")
replace brand = "HALSA" 	 					if regexm(brand,"^HALSA") & regexm(l4,"^SCHWARZKOPF")
replace brand = "SEXY HAIR" 	 				if regexm(brand,"SEXY HAIR") & regexm(l4,"^SEXY HAIR")
replace brand = "SHIKAI" 	 					if regexm(brand,"^SHIKAI") & regexm(l4,"^SHIKAI")
replace brand = "SOFT SHEEN" 	 				if regexm(brand,"^SOFT SHEEN") & regexm(l4,"^SOFT SHEEN")
replace brand = "SORBIE" 	 					if regexm(brand,"^SORBIE") & regexm(l4,"^STEPHAN")
replace brand = "MANE N TAIL" 	 				if regexm(brand,"MANE N TAIL") & regexm(l4,"^STRAIGHT ARROW")
replace brand = "PROFECTIV" 	 				if regexm(brand,"^PROFECTIV") & regexm(l4,"^STRENGTH OF NATURE")
replace brand = "BRECK" 	 					if regexm(brand,"^BRECK") & regexm(l4,"^THE DIAL CORPORATION")
replace brand = "AVALON" 	 					if regexm(brand,"^AVALON") & regexm(l4,"^THE HAIN CELESTIAL")
replace brand = "TIGI" 	 						if regexm(brand,"^TIGI") & regexm(l4,"^TIGI")
replace brand = "UMBERTO GIANNINI" 	 			if regexm(brand,"^UMBERTO GIANNINI") & regexm(l4,"^UMBERTO GIANNINI")
replace brand = "SUNSILK" 	 					if regexm(brand,"^SUNSILK") & regexm(l4,"^UNILEVER")
replace brand = "PRO VITAMIN" 	 				if regexm(brand,"^PRO VITAMIN") & regexm(l4,"^VOGUE")
replace brand = "CABOT" 	 					if regexm(brand,"^CABOT") & regexm(l4,"^WEST CABOT")
replace brand = "WHITE RAIN" 	 				if regexm(brand,"^WHITE RAIN") & regexm(l4,"^WHITE RAIN")
replace brand = "AURA" 	 						if regexm(brand,"^AURA") & regexm(l4,"^ZOTOS")
replace brand = "BAIN DE TERRE"					if regexm(brand,"^BAIN DE TERRE") & regexm(l4,"^ZOTOS")
replace brand = "SENSCIENCE" 	 				if regexm(brand,"^SENSCIENCE") & regexm(l4,"^ZOTOS")


*********************************************
gen product = rtrim(subinstr(l9,lastword,"",1))
*********************************************

*** Product type ****

gen ddfsh=regexm(product,"DDFSH")
tab ddfsh
tab ddfsh producttype

*** Flavour ***
encode flavorscent, gen(flavour)
mvdecode flavour, mv(174)
decode flavour, gen(flavourstr)
drop flavorscent flavour


**** Type of shampoo ***
encode typeofshampoo, gen(shamptype)
mvdecode shamptype, mv(77)
decode shamptype, gen(shamptypestr)
drop typeofshampoo shamptype 

**** Type of hair ***
tab typeofhaircare

*** Form already included in definition ***
tab form
encode form , gen(Form)
mvdecode Form,mv(12)
decode Form,gen(formstr)
drop form Form

*** Package ***
tab package

********************************************************
*************							****************
*************   Definition of a product ****************
*************							****************
********************************************************

*** All strings ****
gen product1=product+" - "+(flavourstr)+"-"+(shamptypestr)+"-"+(formstr)
encode product1, gen(prodcode)
codebook prodcode

********************************************************
********************************************************

replace product=product1

sort product1 size


keep upc size product1 prodcode package brand
label variable package "package type"
label variable product1 "product description, and mechanism"

sort upc
isid upc product1


sort prodcode

egen NumPacks=count(size), by(prodcode)
tab NumPacks
drop if NumPacks ==1
